import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import plotly.express as px
import seaborn as sns
from matplotlib import style
import missingno as msno
from IPython.core.interactiveshell import InteractiveShell
# reading data
df=pd.read_csv('Downloads/Project 5/sales_data.csv')
d=df.copy(deep=True)
d
| Order Date | Order ID | Product | Product_ean | Category | Purchase Address | Quantity Ordered | Price Each | Cost price | turnover | margin | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1/22/2019 21:25 | 141234 | iPhone | 5.640000e+12 | Phones | 944 Walnut St, Boston, MA 02215 | 1 | 700.00 | 231.0000 | 700.00 | 469.0000 |
| 1 | 1/28/2019 14:15 | 141235 | Lightning Charging Cable | 5.560000e+12 | Accessories | 185 Maple St, Portland, OR 97035 | 1 | 14.95 | 7.4750 | 14.95 | 7.4750 |
| 2 | 1/17/2019 13:33 | 141236 | Wired Headphones | 2.110000e+12 | Accessories | 538 Adams St, San Francisco, CA 94016 | 2 | 11.99 | 5.9950 | 23.98 | 11.9900 |
| 3 | 1/5/2019 20:33 | 141237 | 27in FHD Monitor | 3.070000e+12 | NaN | 738 10th St, Los Angeles, CA 90001 | 1 | 149.99 | 97.4935 | 149.99 | 52.4965 |
| 4 | 1/25/2019 11:59 | 141238 | Wired Headphones | 9.690000e+12 | Accessories | 387 10th St, Austin, TX 73301 | 1 | 11.99 | 5.9950 | 11.99 | 5.9950 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 185961 | 12/11/2019 20:58 | 319666 | Lightning Charging Cable | 6.550000e+12 | Accessories | 14 Madison St, San Francisco, CA 94016 | 1 | 14.95 | 7.4750 | 14.95 | 7.4750 |
| 185962 | 12/1/2019 12:01 | 319667 | AA Batteries (4-pack) | 5.350000e+12 | Accessories | 549 Willow St, Los Angeles, CA 90001 | 2 | 3.84 | 1.9200 | 7.68 | 3.8400 |
| 185963 | 12/9/2019 6:43 | 319668 | Vareebadd Phone | 2.670000e+12 | Accessories | 273 Wilson St, Seattle, WA 98101 | 1 | 400.00 | 132.0000 | 400.00 | 268.0000 |
| 185964 | 12/3/2019 10:39 | 319669 | Wired Headphones | 5.220000e+12 | Accessories | 778 River St, Dallas, TX 75001 | 1 | 11.99 | 5.9950 | 11.99 | 5.9950 |
| 185965 | 12/21/2019 21:45 | 319670 | Bose SoundSport Headphones | 8.080000e+12 | Accessories | 747 Chestnut St, Los Angeles, CA 90001 | 1 | 99.99 | 49.9950 | 99.99 | 49.9950 |
185966 rows × 11 columns
d.sample(10)
| Order Date | Order ID | Product | Product_ean | Category | Purchase Address | Quantity Ordered | Price Each | Cost price | turnover | margin | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 135276 | 10/26/2019 14:39 | 271020 | Bose SoundSport Headphones | 1.980000e+12 | Accessories | 958 1st St, Boston, MA 02215 | 1 | 99.99 | 49.9950 | 99.99 | 49.9950 |
| 120628 | 9/21/2019 8:56 | 256960 | AA Batteries (4-pack) | 6.190000e+12 | Accessories | 453 Center St, San Francisco, CA 94016 | 1 | 3.84 | 1.9200 | 3.84 | 1.9200 |
| 166348 | 12/16/2019 11:26 | 300803 | Lightning Charging Cable | 4.640000e+12 | Accessories | 555 Church St, San Francisco, CA 94016 | 1 | 14.95 | 7.4750 | 14.95 | 7.4750 |
| 177118 | 12/2/2019 13:18 | 311171 | Bose SoundSport Headphones | 7.720000e+12 | Accessories | 56 Washington St, Los Angeles, CA 90001 | 1 | 99.99 | 49.9950 | 99.99 | 49.9950 |
| 62277 | 5/28/2019 3:01 | 200928 | AA Batteries (4-pack) | 5.380000e+12 | Accessories | 622 South St, Boston, MA 02215 | 1 | 3.84 | 1.9200 | 3.84 | 1.9200 |
| 25080 | 3/3/2019 17:22 | 165260 | 34in Ultrawide Monitor | 6.400000e+12 | Appliances | 716 Washington St, San Francisco, CA 94016 | 1 | 379.99 | 125.3967 | 379.99 | 254.5933 |
| 118766 | 9/27/2019 11:24 | 255152 | AA Batteries (4-pack) | 4.330000e+12 | Accessories | 235 Wilson St, Austin, TX 73301 | 1 | 3.84 | 1.9200 | 3.84 | 1.9200 |
| 121423 | 9/21/2019 12:31 | 257728 | 27in 4K Gaming Monitor | 8.180000e+12 | Appliances | 733 Walnut St, New York City, NY 10001 | 1 | 389.99 | 128.6967 | 389.99 | 261.2933 |
| 126973 | 10/20/2019 14:21 | 263054 | Bose SoundSport Headphones | 2.030000e+12 | Accessories | 20 Ridge St, Seattle, WA 98101 | 1 | 99.99 | 49.9950 | 99.99 | 49.9950 |
| 18766 | 2/16/2019 19:40 | 159208 | Bose SoundSport Headphones | 3.410000e+12 | Accessories | 688 Washington St, New York City, NY 10001 | 1 | 99.99 | 49.9950 | 99.99 | 49.9950 |
d.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 185966 entries, 0 to 185965 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Order Date 185966 non-null object 1 Order ID 185966 non-null int64 2 Product 185966 non-null object 3 Product_ean 185966 non-null float64 4 Category 185366 non-null object 5 Purchase Address 185966 non-null object 6 Quantity Ordered 185966 non-null int64 7 Price Each 185734 non-null float64 8 Cost price 185966 non-null float64 9 turnover 185966 non-null float64 10 margin 185966 non-null float64 dtypes: float64(5), int64(2), object(4) memory usage: 15.6+ MB
d.describe()
| Order ID | Product_ean | Quantity Ordered | Price Each | Cost price | turnover | margin | |
|---|---|---|---|---|---|---|---|
| count | 185966.000000 | 1.859660e+05 | 185966.000000 | 185734.000000 | 185966.000000 | 185966.000000 | 185966.000000 |
| mean | 230414.910914 | 5.509176e+12 | 1.124388 | 184.040854 | 69.670555 | 185.496050 | 115.292602 |
| std | 51513.524171 | 2.598419e+12 | 0.442801 | 332.712540 | 109.427028 | 332.928676 | 225.233281 |
| min | 141234.000000 | 1.000000e+12 | 1.000000 | 2.990000 | 1.495000 | 2.990000 | 1.495000 |
| 25% | 185828.250000 | 3.250000e+12 | 1.000000 | 11.950000 | 5.975000 | 11.950000 | 5.975000 |
| 50% | 230368.500000 | 5.510000e+12 | 1.000000 | 14.950000 | 7.475000 | 14.950000 | 7.475000 |
| 75% | 275032.000000 | 7.770000e+12 | 1.000000 | 150.000000 | 97.500000 | 150.000000 | 52.500000 |
| max | 319670.000000 | 1.000000e+13 | 9.000000 | 1700.000000 | 561.000000 | 3400.000000 | 2278.000000 |
Nulls using missingno
InteractiveShell.ast_node_interactivity = "all"
msno.bar(d)
<Axes: >
msno.matrix(df.sort_values(by='Order ID'))
<Axes: >
msno.heatmap(d,cmap='GnBu')
<Axes: >
Duplicated values
d[d.duplicated(keep=False)]
| Order Date | Order ID | Product | Product_ean | Category | Purchase Address | Quantity Ordered | Price Each | Cost price | turnover | margin | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 6 | 1/26/2019 12:16 | 141240 | 27in 4K Gaming Monitor | 5.170000e+12 | Appliances | 979 Park St, Los Angeles, CA 90001 | 1 | 389.99 | 128.6967 | 389.99 | 261.2933 |
| 7 | 1/5/2019 12:04 | 141241 | USB-C Charging Cable | 8.050000e+12 | NaN | 181 6th St, San Francisco, CA 94016 | 1 | 11.95 | 5.9750 | 11.95 | 5.9750 |
| 8 | 1/1/2019 10:30 | 141242 | Bose SoundSport Headphones | 1.510000e+12 | Accessories | 867 Willow St, Los Angeles, CA 90001 | 1 | 99.99 | 49.9950 | 99.99 | 49.9950 |
| 9 | 1/22/2019 21:20 | 141243 | Apple Airpods Headphones | 1.390000e+12 | Accessories | 657 Johnson St, San Francisco, CA 94016 | 1 | 150.00 | 97.5000 | 150.00 | 52.5000 |
| 10 | 1/7/2019 11:29 | 141244 | Apple Airpods Headphones | 4.330000e+12 | Accessories | 492 Walnut St, San Francisco, CA 94016 | 1 | 150.00 | 97.5000 | 150.00 | 52.5000 |
| 11 | 1/31/2019 10:12 | 141245 | Macbook Pro Laptop | 1.170000e+12 | electronics | 322 6th St, San Francisco, CA 94016 | 1 | 1700.00 | 561.0000 | 1700.00 | 1139.0000 |
| 12 | 1/9/2019 18:57 | 141246 | AAA Batteries (4-pack) | 4.440000e+12 | Accessories | 618 7th St, Los Angeles, CA 90001 | 3 | 2.99 | 1.4950 | 8.97 | 4.4850 |
| 13 | 1/26/2019 12:16 | 141240 | 27in 4K Gaming Monitor | 5.170000e+12 | Appliances | 979 Park St, Los Angeles, CA 90001 | 1 | 389.99 | 128.6967 | 389.99 | 261.2933 |
| 14 | 1/5/2019 12:04 | 141241 | USB-C Charging Cable | 8.050000e+12 | NaN | 181 6th St, San Francisco, CA 94016 | 1 | 11.95 | 5.9750 | 11.95 | 5.9750 |
| 15 | 1/1/2019 10:30 | 141242 | Bose SoundSport Headphones | 1.510000e+12 | Accessories | 867 Willow St, Los Angeles, CA 90001 | 1 | 99.99 | 49.9950 | 99.99 | 49.9950 |
| 16 | 1/22/2019 21:20 | 141243 | Apple Airpods Headphones | 1.390000e+12 | Accessories | 657 Johnson St, San Francisco, CA 94016 | 1 | 150.00 | 97.5000 | 150.00 | 52.5000 |
| 17 | 1/7/2019 11:29 | 141244 | Apple Airpods Headphones | 4.330000e+12 | Accessories | 492 Walnut St, San Francisco, CA 94016 | 1 | 150.00 | 97.5000 | 150.00 | 52.5000 |
| 18 | 1/31/2019 10:12 | 141245 | Macbook Pro Laptop | 1.170000e+12 | electronics | 322 6th St, San Francisco, CA 94016 | 1 | 1700.00 | 561.0000 | 1700.00 | 1139.0000 |
| 19 | 1/9/2019 18:57 | 141246 | AAA Batteries (4-pack) | 4.440000e+12 | Accessories | 618 7th St, Los Angeles, CA 90001 | 3 | 2.99 | 1.4950 | 8.97 | 4.4850 |
| 90539 | 7/6/2019 16:53 | 228015 | AA Batteries (4-pack) | 8.850000e+12 | Accessories | 162 Ridge St, Boston, MA 02215 | 1 | 3.84 | 1.9200 | 3.84 | 1.9200 |
| 90540 | 7/6/2019 16:53 | 228015 | AA Batteries (4-pack) | 8.850000e+12 | Accessories | 162 Ridge St, Boston, MA 02215 | 1 | 3.84 | 1.9200 | 3.84 | 1.9200 |
| 108023 | 8/13/2019 20:13 | 244840 | iPhone | 7.390000e+12 | Phones | 817 Cedar St, Dallas, TX 75001 | 1 | 700.00 | 231.0000 | 700.00 | 469.0000 |
| 108024 | 8/11/2019 9:42 | 244841 | AA Batteries (4-pack) | 4.440000e+12 | Accessories | 130 Jefferson St, San Francisco, CA 94016 | 2 | 3.84 | 1.9200 | 7.68 | 3.8400 |
| 108025 | 8/16/2019 11:12 | 244842 | USB-C Charging Cable | 4.140000e+12 | Accessories | 364 Cedar St, Atlanta, GA 30301 | 1 | 11.95 | 5.9750 | 11.95 | 5.9750 |
| 108026 | 8/7/2019 15:55 | 244843 | Wired Headphones | 6.420000e+12 | Accessories | 895 Hickory St, New York City, NY 10001 | 1 | 11.99 | 5.9950 | 11.99 | 5.9950 |
| 108027 | 8/1/2019 16:25 | 244844 | Lightning Charging Cable | 9.940000e+12 | Accessories | 29 Center St, Boston, MA 02215 | 1 | 14.95 | 7.4750 | 14.95 | 7.4750 |
| 108028 | 8/31/2019 12:48 | 244845 | Apple Airpods Headphones | 4.740000e+12 | Accessories | 11 Park St, Atlanta, GA 30301 | 1 | 150.00 | 97.5000 | 150.00 | 52.5000 |
| 108029 | 8/13/2019 17:52 | 244846 | Bose SoundSport Headphones | 9.760000e+12 | Accessories | 31 Spruce St, New York City, NY 10001 | 1 | 99.99 | 49.9950 | 99.99 | 49.9950 |
| 108030 | 8/12/2019 19:56 | 244847 | Vareebadd Phone | 2.700000e+12 | Accessories | 183 Willow St, Los Angeles, CA 90001 | 1 | 400.00 | 132.0000 | 400.00 | 268.0000 |
| 108031 | 8/1/2019 20:29 | 244848 | Lightning Charging Cable | 6.240000e+12 | Accessories | 360 Johnson St, Boston, MA 02215 | 1 | 14.95 | 7.4750 | 14.95 | 7.4750 |
| 108032 | 8/13/2019 20:13 | 244840 | iPhone | 7.390000e+12 | Phones | 817 Cedar St, Dallas, TX 75001 | 1 | 700.00 | 231.0000 | 700.00 | 469.0000 |
| 108033 | 8/11/2019 9:42 | 244841 | AA Batteries (4-pack) | 4.440000e+12 | Accessories | 130 Jefferson St, San Francisco, CA 94016 | 2 | 3.84 | 1.9200 | 7.68 | 3.8400 |
| 108034 | 8/16/2019 11:12 | 244842 | USB-C Charging Cable | 4.140000e+12 | Accessories | 364 Cedar St, Atlanta, GA 30301 | 1 | 11.95 | 5.9750 | 11.95 | 5.9750 |
| 108035 | 8/7/2019 15:55 | 244843 | Wired Headphones | 6.420000e+12 | Accessories | 895 Hickory St, New York City, NY 10001 | 1 | 11.99 | 5.9950 | 11.99 | 5.9950 |
| 108036 | 8/1/2019 16:25 | 244844 | Lightning Charging Cable | 9.940000e+12 | Accessories | 29 Center St, Boston, MA 02215 | 1 | 14.95 | 7.4750 | 14.95 | 7.4750 |
| 108037 | 8/31/2019 12:48 | 244845 | Apple Airpods Headphones | 4.740000e+12 | Accessories | 11 Park St, Atlanta, GA 30301 | 1 | 150.00 | 97.5000 | 150.00 | 52.5000 |
| 108038 | 8/13/2019 17:52 | 244846 | Bose SoundSport Headphones | 9.760000e+12 | Accessories | 31 Spruce St, New York City, NY 10001 | 1 | 99.99 | 49.9950 | 99.99 | 49.9950 |
| 108039 | 8/12/2019 19:56 | 244847 | Vareebadd Phone | 2.700000e+12 | Accessories | 183 Willow St, Los Angeles, CA 90001 | 1 | 400.00 | 132.0000 | 400.00 | 268.0000 |
| 108040 | 8/1/2019 20:29 | 244848 | Lightning Charging Cable | 6.240000e+12 | Accessories | 360 Johnson St, Boston, MA 02215 | 1 | 14.95 | 7.4750 | 14.95 | 7.4750 |
d.drop_duplicates(inplace=True)
d[d.duplicated(keep=False)]
| Order Date | Order ID | Product | Product_ean | Category | Purchase Address | Quantity Ordered | Price Each | Cost price | turnover | margin |
|---|
Converting types
d['Product']=d['Product'].astype('string')
d['Order Date']=pd.to_datetime(d['Order Date'])
d['Purchase Address']=d['Purchase Address'].astype('string')
d.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 185949 entries, 0 to 185965 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Order Date 185949 non-null datetime64[ns] 1 Order ID 185949 non-null int64 2 Product 185949 non-null string 3 Product_ean 185949 non-null float64 4 Category 185949 non-null object 5 Purchase Address 185949 non-null string 6 Quantity Ordered 185949 non-null int64 7 Price Each 185949 non-null float64 8 Cost price 185949 non-null float64 9 turnover 185949 non-null float64 10 margin 185949 non-null float64 dtypes: datetime64[ns](1), float64(5), int64(2), object(1), string(2) memory usage: 21.1+ MB
Detecting outliers in each column using boxplot in plotly
# for i in d.columns:
# fig = px.box(d, y=i)
# fig.write_html(f'outliers{i}.html')
# fig.show()
fixing missing values
d.isnull().sum()
Order Date 0 Order ID 0 Product 0 Product_ean 0 Category 599 Purchase Address 0 Quantity Ordered 0 Price Each 232 Cost price 0 turnover 0 margin 0 dtype: int64
d['Category'].fillna('Accessories',inplace=True)
d['Price Each'].fillna(d['Price Each'].median(),inplace=True)
d.isnull().sum()
Order Date 0 Order ID 0 Product 0 Product_ean 0 Category 0 Purchase Address 0 Quantity Ordered 0 Price Each 0 Cost price 0 turnover 0 margin 0 dtype: int64
d.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 185949 entries, 0 to 185965 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Order Date 185949 non-null datetime64[ns] 1 Order ID 185949 non-null int64 2 Product 185949 non-null string 3 Product_ean 185949 non-null float64 4 Category 185949 non-null object 5 Purchase Address 185949 non-null object 6 Quantity Ordered 185949 non-null int64 7 Price Each 185949 non-null float64 8 Cost price 185949 non-null float64 9 turnover 185949 non-null float64 10 margin 185949 non-null float64 dtypes: datetime64[ns](1), float64(5), int64(2), object(2), string(1) memory usage: 17.0+ MB
d.describe()
| Order ID | Product_ean | Quantity Ordered | Price Each | Cost price | turnover | margin | |
|---|---|---|---|---|---|---|---|
| count | 185949.000000 | 1.859490e+05 | 185949.000000 | 185949.000000 | 185949.000000 | 185949.000000 | 185949.000000 |
| mean | 230417.582299 | 5.509192e+12 | 1.124384 | 183.825650 | 69.668947 | 185.491894 | 115.290031 |
| std | 51512.875322 | 2.598407e+12 | 0.442794 | 332.550136 | 109.424372 | 332.920400 | 225.227642 |
| min | 141234.000000 | 1.000000e+12 | 1.000000 | 2.990000 | 1.495000 | 2.990000 | 1.495000 |
| 25% | 185831.000000 | 3.250000e+12 | 1.000000 | 11.950000 | 5.975000 | 11.950000 | 5.975000 |
| 50% | 230368.000000 | 5.510000e+12 | 1.000000 | 14.950000 | 7.475000 | 14.950000 | 7.475000 |
| 75% | 275036.000000 | 7.770000e+12 | 1.000000 | 150.000000 | 97.500000 | 150.000000 | 52.500000 |
| max | 319670.000000 | 1.000000e+13 | 9.000000 | 1700.000000 | 561.000000 | 3400.000000 | 2278.000000 |
Count categories
fig = px.scatter(d,width =600,height = 600, x="Quantity Ordered", y="Cost price", color="Category" )
fig.show()
plt.figure(figsize=(10, 6))
sns.kdeplot(data=d, x='Cost price', hue='Category', fill=True)
plt.xlabel('Cost price')
plt.ylabel('Density')
plt.title('Price Distribution by Category')
plt.show()
<Figure size 1000x600 with 0 Axes>
<Axes: xlabel='Cost price', ylabel='Density'>
Text(0.5, 0, 'Cost price')
Text(0, 0.5, 'Density')
Text(0.5, 1.0, 'Price Distribution by Category')
sns.pairplot(d[['Cost price', 'Quantity Ordered', 'Category']],hue='Category' , diag_kind='kde')
# plt.title('Pairwise Scatter Plots - Cost price, Quantity, and Product')
# plt.suptitle('Pairwise Scatter Plots - Sales, Quantity, and Age',y=1)
plt.show()
<seaborn.axisgrid.PairGrid at 0x16df1a07d50>
a) what is the relantion between Category and Cost price?
plt.figure(figsize=(8, 6))
sns.boxplot(y='Cost price', x='Category', data=d)
plt.xlabel(' Category')
plt.ylabel('Cost Sales')
plt.show()
<Figure size 800x600 with 0 Axes>
<Axes: xlabel='Category', ylabel='Cost price'>
Text(0.5, 0, ' Category')
Text(0, 0.5, 'Cost Sales')
b) what is the relation between Cost price , Category and Quantity?
plt.figure(figsize=(10, 6))
marital_type_sales = df.pivot_table(index='Quantity Ordered', columns='Category', values='Cost price', aggfunc='sum')
marital_type_sales.plot(kind='bar', stacked=True)
plt.xlabel('Quantity Ordered')
plt.ylabel('Cost price')
plt.legend(title='Category', bbox_to_anchor=(1, 1))
plt.show()
<Figure size 1000x600 with 0 Axes>
<Axes: xlabel='Quantity Ordered'>
Text(0.5, 0, 'Quantity Ordered')
Text(0, 0.5, 'Cost price')
<matplotlib.legend.Legend at 0x16d86c51cd0>
<Figure size 1000x600 with 0 Axes>
c) what is the correlation between features?
correlation_matrix = d.corr(numeric_only=True)
sns.heatmap(correlation_matrix, annot=True,
cmap='coolwarm'
)
plt.title('Correlation Matrix')
plt.show()
<Axes: >
Text(0.5, 1.0, 'Correlation Matrix')
d) what is the ratio between categories?
ratio= d.groupby('Category')['Category'].count()
plt.pie(ratio, labels=ratio.index,
autopct='%1.1f%%',wedgeprops=dict(width=0.5)
)
plt.title('Categories percentage')
plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()
([<matplotlib.patches.Wedge at 0x16d85c50150>, <matplotlib.patches.Wedge at 0x16d86d37350>, <matplotlib.patches.Wedge at 0x16d86d48e50>, <matplotlib.patches.Wedge at 0x16d86d4ac90>], [Text(-0.7135583623522399, 0.8371585653370511, 'Accessories'), Text(0.3756370504333411, -1.0338746569781754, 'Appliances'), Text(0.961878432899624, -0.5336570812071771, 'Phones'), Text(1.087785030634161, -0.1634739340942089, 'electronics')], [Text(-0.38921365219213083, 0.4566319447293006, '72.5%'), Text(0.2048929366000042, -0.5639316310790047, '16.2%'), Text(0.5246609633997948, -0.29108568065846024, '6.6%'), Text(0.593337289436815, -0.08916760041502303, '4.7%')])
Text(0.5, 1.0, 'Categories percentage')
(-1.0999956194338132, 1.0999997914016102, -1.0999985133508985, 1.0999982307209664)
e) what is the most products sold?
ratio= d.groupby('Product')['Quantity Ordered'].sum()
fig, ax = plt.subplots()
plt.bar(ratio.index, ratio.values)
plt.xticks(rotation=90)
plt.xlabel('Sales')
plt.ylabel('Amount')
plt.show()
<BarContainer object of 19 artists>
([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], [Text(0, 0, '20in Monitor'), Text(1, 0, '27in 4K Gaming Monitor'), Text(2, 0, '27in FHD Monitor'), Text(3, 0, '34in Ultrawide Monitor'), Text(4, 0, 'AA Batteries (4-pack)'), Text(5, 0, 'AAA Batteries (4-pack)'), Text(6, 0, 'Apple Airpods Headphones'), Text(7, 0, 'Bose SoundSport Headphones'), Text(8, 0, 'Flatscreen TV'), Text(9, 0, 'Google Phone'), Text(10, 0, 'LG Dryer'), Text(11, 0, 'LG Washing Machine'), Text(12, 0, 'Lightning Charging Cable'), Text(13, 0, 'Macbook Pro Laptop'), Text(14, 0, 'ThinkPad Laptop'), Text(15, 0, 'USB-C Charging Cable'), Text(16, 0, 'Vareebadd Phone'), Text(17, 0, 'Wired Headphones'), Text(18, 0, 'iPhone')])
Text(0.5, 0, 'Sales')
Text(0, 0.5, 'Amount')
f) what is the number of unique values in Price Each?
np.count_nonzero(d['Price Each'].unique())
17
g) what happend to cost price over time ?
fig = px.line(d, x='Order Date', y='Cost price', color='Product',markers=True)
fig.show()
h) how weekday sales is going?
weekday_sales = d['Order Date'].dt.day_name()
weekday_sales.value_counts().plot(kind='bar',
figsize=(10, 6))
plt.xlabel('Weekday')
plt.ylabel('Sales Count')
plt.title('Sales by Weekday')
plt.show()
<Axes: >
Text(0.5, 0, 'Weekday')
Text(0, 0.5, 'Sales Count')
Text(0.5, 1.0, 'Sales by Weekday')
i) how will be the relation & the best fit line between Price Each & Cost price?
fig = px.scatter(d,width =600,height = 600, x="Cost price", y="Price Each",
trendline="ols")
fig.show()
j)how will be Cost price Trend?
pt= d.pivot_table(index='Order Date',
columns='Category',
values='Cost price', aggfunc='sum')
pt.resample('M').sum().plot(kind='area', figsize=(10, 6))
plt.xlabel('Date')
plt.ylabel('Cost price')
plt.title('Cost price Trend by Category')
plt.show()
<Axes: xlabel='Order Date'>
Text(0.5, 0, 'Date')
Text(0, 0.5, 'Cost price')
Text(0.5, 1.0, 'Cost price Trend by Category')
c=d['Purchase Address'].tolist()
x=[]
for i in c:
q=re.split(",",i)
x.append(q[1])
d['city']=pd.Series(x)
d
| Order Date | Order ID | Product | Product_ean | Category | Purchase Address | Quantity Ordered | Price Each | Cost price | turnover | margin | city | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2019-01-22 21:25:00 | 141234 | iPhone | 5.640000e+12 | Phones | 944 Walnut St, Boston, MA 02215 | 1 | 700.00 | 231.0000 | 700.00 | 469.0000 | Boston |
| 1 | 2019-01-28 14:15:00 | 141235 | Lightning Charging Cable | 5.560000e+12 | Accessories | 185 Maple St, Portland, OR 97035 | 1 | 14.95 | 7.4750 | 14.95 | 7.4750 | Portland |
| 2 | 2019-01-17 13:33:00 | 141236 | Wired Headphones | 2.110000e+12 | Accessories | 538 Adams St, San Francisco, CA 94016 | 2 | 11.99 | 5.9950 | 23.98 | 11.9900 | San Francisco |
| 3 | 2019-01-05 20:33:00 | 141237 | 27in FHD Monitor | 3.070000e+12 | Accessories | 738 10th St, Los Angeles, CA 90001 | 1 | 149.99 | 97.4935 | 149.99 | 52.4965 | Los Angeles |
| 4 | 2019-01-25 11:59:00 | 141238 | Wired Headphones | 9.690000e+12 | Accessories | 387 10th St, Austin, TX 73301 | 1 | 11.99 | 5.9950 | 11.99 | 5.9950 | Austin |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 185961 | 2019-12-11 20:58:00 | 319666 | Lightning Charging Cable | 6.550000e+12 | Accessories | 14 Madison St, San Francisco, CA 94016 | 1 | 14.95 | 7.4750 | 14.95 | 7.4750 | NaN |
| 185962 | 2019-12-01 12:01:00 | 319667 | AA Batteries (4-pack) | 5.350000e+12 | Accessories | 549 Willow St, Los Angeles, CA 90001 | 2 | 3.84 | 1.9200 | 7.68 | 3.8400 | NaN |
| 185963 | 2019-12-09 06:43:00 | 319668 | Vareebadd Phone | 2.670000e+12 | Accessories | 273 Wilson St, Seattle, WA 98101 | 1 | 400.00 | 132.0000 | 400.00 | 268.0000 | NaN |
| 185964 | 2019-12-03 10:39:00 | 319669 | Wired Headphones | 5.220000e+12 | Accessories | 778 River St, Dallas, TX 75001 | 1 | 11.99 | 5.9950 | 11.99 | 5.9950 | NaN |
| 185965 | 2019-12-21 21:45:00 | 319670 | Bose SoundSport Headphones | 8.080000e+12 | Accessories | 747 Chestnut St, Los Angeles, CA 90001 | 1 | 99.99 | 49.9950 | 99.99 | 49.9950 | NaN |
185949 rows × 12 columns